{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. 获取数据\n",
    "# 2. 计算相似度\n",
    "# 3. 进行评分估计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1. 获取数据\n",
    "rating = pd.read_csv('../../../../../data/ml-latest-small/ratings.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(100836, 4)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rating.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userId</th>\n",
       "      <th>movieId</th>\n",
       "      <th>rating</th>\n",
       "      <th>timestamp</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4.0</td>\n",
       "      <td>964982703</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>964981247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4.0</td>\n",
       "      <td>964982224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>47</td>\n",
       "      <td>5.0</td>\n",
       "      <td>964983815</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>50</td>\n",
       "      <td>5.0</td>\n",
       "      <td>964982931</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userId  movieId  rating  timestamp\n",
       "0       1        1     4.0  964982703\n",
       "1       1        3     4.0  964981247\n",
       "2       1        6     4.0  964982224\n",
       "3       1       47     5.0  964983815\n",
       "4       1       50     5.0  964982931"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rating.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>movieId</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>...</th>\n",
       "      <th>193565</th>\n",
       "      <th>193567</th>\n",
       "      <th>193571</th>\n",
       "      <th>193573</th>\n",
       "      <th>193579</th>\n",
       "      <th>193581</th>\n",
       "      <th>193583</th>\n",
       "      <th>193585</th>\n",
       "      <th>193587</th>\n",
       "      <th>193609</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>userId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>606</th>\n",
       "      <td>2.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2.5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>607</th>\n",
       "      <td>4.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>608</th>\n",
       "      <td>2.5</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>609</th>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>4.0</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>610</th>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>610 rows × 9724 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "movieId  1       2       3       4       5       6       7       8       \\\n",
       "userId                                                                    \n",
       "1           4.0     NaN     4.0     NaN     NaN     4.0     NaN     NaN   \n",
       "2           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "3           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "4           NaN     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "5           4.0     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "...         ...     ...     ...     ...     ...     ...     ...     ...   \n",
       "606         2.5     NaN     NaN     NaN     NaN     NaN     2.5     NaN   \n",
       "607         4.0     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "608         2.5     2.0     2.0     NaN     NaN     NaN     NaN     NaN   \n",
       "609         3.0     NaN     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "610         5.0     NaN     NaN     NaN     NaN     5.0     NaN     NaN   \n",
       "\n",
       "movieId  9       10      ...  193565  193567  193571  193573  193579  193581  \\\n",
       "userId                   ...                                                   \n",
       "1           NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "2           NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "3           NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "4           NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "5           NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "...         ...     ...  ...     ...     ...     ...     ...     ...     ...   \n",
       "606         NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "607         NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "608         NaN     4.0  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "609         NaN     4.0  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "610         NaN     NaN  ...     NaN     NaN     NaN     NaN     NaN     NaN   \n",
       "\n",
       "movieId  193583  193585  193587  193609  \n",
       "userId                                   \n",
       "1           NaN     NaN     NaN     NaN  \n",
       "2           NaN     NaN     NaN     NaN  \n",
       "3           NaN     NaN     NaN     NaN  \n",
       "4           NaN     NaN     NaN     NaN  \n",
       "5           NaN     NaN     NaN     NaN  \n",
       "...         ...     ...     ...     ...  \n",
       "606         NaN     NaN     NaN     NaN  \n",
       "607         NaN     NaN     NaN     NaN  \n",
       "608         NaN     NaN     NaN     NaN  \n",
       "609         NaN     NaN     NaN     NaN  \n",
       "610         NaN     NaN     NaN     NaN  \n",
       "\n",
       "[610 rows x 9724 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 1.1 查看用户和电影的透视图\n",
    "user_movie_pivot = rating.pivot_table(index='userId', columns='movieId', values='rating')\n",
    "user_movie_pivot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>userId</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "      <th>10</th>\n",
       "      <th>...</th>\n",
       "      <th>601</th>\n",
       "      <th>602</th>\n",
       "      <th>603</th>\n",
       "      <th>604</th>\n",
       "      <th>605</th>\n",
       "      <th>606</th>\n",
       "      <th>607</th>\n",
       "      <th>608</th>\n",
       "      <th>609</th>\n",
       "      <th>610</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>userId</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.079819</td>\n",
       "      <td>0.207983</td>\n",
       "      <td>0.268749</td>\n",
       "      <td>-0.291636</td>\n",
       "      <td>-0.118773</td>\n",
       "      <td>0.469668</td>\n",
       "      <td>0.918559</td>\n",
       "      <td>-0.037987</td>\n",
       "      <td>...</td>\n",
       "      <td>0.091574</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.061503</td>\n",
       "      <td>-0.407556</td>\n",
       "      <td>-0.164871</td>\n",
       "      <td>0.066378</td>\n",
       "      <td>0.174557</td>\n",
       "      <td>0.268070</td>\n",
       "      <td>-0.175412</td>\n",
       "      <td>-0.032086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.991241</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.037796</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.387347</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.583333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.125000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.623288</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.079819</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.433200</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.791334</td>\n",
       "      <td>-0.333333</td>\n",
       "      <td>-0.395092</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.569562</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.207983</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.336525</td>\n",
       "      <td>0.148498</td>\n",
       "      <td>0.542861</td>\n",
       "      <td>0.117851</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.485794</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.222113</td>\n",
       "      <td>0.396641</td>\n",
       "      <td>0.090090</td>\n",
       "      <td>-0.080296</td>\n",
       "      <td>0.400124</td>\n",
       "      <td>0.144603</td>\n",
       "      <td>0.116518</td>\n",
       "      <td>-0.170501</td>\n",
       "      <td>-0.277350</td>\n",
       "      <td>-0.043786</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>0.268749</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.336525</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.043166</td>\n",
       "      <td>0.158114</td>\n",
       "      <td>0.028347</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.777714</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.153303</td>\n",
       "      <td>0.234743</td>\n",
       "      <td>0.067791</td>\n",
       "      <td>-0.364156</td>\n",
       "      <td>0.244321</td>\n",
       "      <td>0.231080</td>\n",
       "      <td>-0.020546</td>\n",
       "      <td>0.384111</td>\n",
       "      <td>0.040582</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>606</th>\n",
       "      <td>0.066378</td>\n",
       "      <td>0.583333</td>\n",
       "      <td>-0.791334</td>\n",
       "      <td>0.144603</td>\n",
       "      <td>0.244321</td>\n",
       "      <td>-0.049192</td>\n",
       "      <td>0.137771</td>\n",
       "      <td>0.253582</td>\n",
       "      <td>0.572700</td>\n",
       "      <td>-0.382955</td>\n",
       "      <td>...</td>\n",
       "      <td>0.290490</td>\n",
       "      <td>0.140613</td>\n",
       "      <td>0.318473</td>\n",
       "      <td>0.682949</td>\n",
       "      <td>0.167062</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.114191</td>\n",
       "      <td>0.240842</td>\n",
       "      <td>0.533002</td>\n",
       "      <td>0.389185</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>607</th>\n",
       "      <td>0.174557</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.333333</td>\n",
       "      <td>0.116518</td>\n",
       "      <td>0.231080</td>\n",
       "      <td>0.255639</td>\n",
       "      <td>0.402792</td>\n",
       "      <td>0.251280</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.241121</td>\n",
       "      <td>...</td>\n",
       "      <td>0.698241</td>\n",
       "      <td>0.217210</td>\n",
       "      <td>0.192787</td>\n",
       "      <td>0.035806</td>\n",
       "      <td>-0.299641</td>\n",
       "      <td>0.114191</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.200814</td>\n",
       "      <td>0.190117</td>\n",
       "      <td>0.106605</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>608</th>\n",
       "      <td>0.268070</td>\n",
       "      <td>-0.125000</td>\n",
       "      <td>-0.395092</td>\n",
       "      <td>-0.170501</td>\n",
       "      <td>-0.020546</td>\n",
       "      <td>0.125428</td>\n",
       "      <td>0.008081</td>\n",
       "      <td>0.434423</td>\n",
       "      <td>0.336625</td>\n",
       "      <td>-0.571043</td>\n",
       "      <td>...</td>\n",
       "      <td>0.473967</td>\n",
       "      <td>0.297646</td>\n",
       "      <td>0.086423</td>\n",
       "      <td>0.053986</td>\n",
       "      <td>-0.075673</td>\n",
       "      <td>0.240842</td>\n",
       "      <td>0.200814</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.488929</td>\n",
       "      <td>0.147606</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>609</th>\n",
       "      <td>-0.175412</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>-0.277350</td>\n",
       "      <td>0.384111</td>\n",
       "      <td>0.193649</td>\n",
       "      <td>0.420288</td>\n",
       "      <td>0.141860</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.188512</td>\n",
       "      <td>0.343303</td>\n",
       "      <td>0.641624</td>\n",
       "      <td>-0.550000</td>\n",
       "      <td>0.533002</td>\n",
       "      <td>0.190117</td>\n",
       "      <td>0.488929</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.521773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>610</th>\n",
       "      <td>-0.032086</td>\n",
       "      <td>0.623288</td>\n",
       "      <td>0.569562</td>\n",
       "      <td>-0.043786</td>\n",
       "      <td>0.040582</td>\n",
       "      <td>0.115580</td>\n",
       "      <td>0.341233</td>\n",
       "      <td>0.167931</td>\n",
       "      <td>0.615638</td>\n",
       "      <td>-0.205081</td>\n",
       "      <td>...</td>\n",
       "      <td>0.007025</td>\n",
       "      <td>0.049263</td>\n",
       "      <td>0.270908</td>\n",
       "      <td>0.310611</td>\n",
       "      <td>0.462274</td>\n",
       "      <td>0.389185</td>\n",
       "      <td>0.106605</td>\n",
       "      <td>0.147606</td>\n",
       "      <td>-0.521773</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>610 rows × 610 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "userId       1         2         3         4         5         6         7    \\\n",
       "userId                                                                         \n",
       "1       1.000000       NaN  0.079819  0.207983  0.268749 -0.291636 -0.118773   \n",
       "2            NaN  1.000000       NaN       NaN       NaN       NaN -0.991241   \n",
       "3       0.079819       NaN  1.000000       NaN       NaN       NaN       NaN   \n",
       "4       0.207983       NaN       NaN  1.000000 -0.336525  0.148498  0.542861   \n",
       "5       0.268749       NaN       NaN -0.336525  1.000000  0.043166  0.158114   \n",
       "...          ...       ...       ...       ...       ...       ...       ...   \n",
       "606     0.066378  0.583333 -0.791334  0.144603  0.244321 -0.049192  0.137771   \n",
       "607     0.174557       NaN -0.333333  0.116518  0.231080  0.255639  0.402792   \n",
       "608     0.268070 -0.125000 -0.395092 -0.170501 -0.020546  0.125428  0.008081   \n",
       "609    -0.175412       NaN       NaN -0.277350  0.384111  0.193649  0.420288   \n",
       "610    -0.032086  0.623288  0.569562 -0.043786  0.040582  0.115580  0.341233   \n",
       "\n",
       "userId       8         9         10   ...       601       602       603  \\\n",
       "userId                                ...                                 \n",
       "1       0.469668  0.918559 -0.037987  ...  0.091574  0.000000 -0.061503   \n",
       "2            NaN       NaN  0.037796  ... -0.387347       NaN -1.000000   \n",
       "3            NaN       NaN       NaN  ...       NaN       NaN  0.433200   \n",
       "4       0.117851       NaN  0.485794  ... -0.222113  0.396641  0.090090   \n",
       "5       0.028347       NaN -0.777714  ...  0.000000  0.153303  0.234743   \n",
       "...          ...       ...       ...  ...       ...       ...       ...   \n",
       "606     0.253582  0.572700 -0.382955  ...  0.290490  0.140613  0.318473   \n",
       "607     0.251280       NaN -0.241121  ...  0.698241  0.217210  0.192787   \n",
       "608     0.434423  0.336625 -0.571043  ...  0.473967  0.297646  0.086423   \n",
       "609     0.141860       NaN       NaN  ...  1.000000  0.188512  0.343303   \n",
       "610     0.167931  0.615638 -0.205081  ...  0.007025  0.049263  0.270908   \n",
       "\n",
       "userId       604       605       606       607       608       609       610  \n",
       "userId                                                                        \n",
       "1      -0.407556 -0.164871  0.066378  0.174557  0.268070 -0.175412 -0.032086  \n",
       "2            NaN       NaN  0.583333       NaN -0.125000       NaN  0.623288  \n",
       "3            NaN       NaN -0.791334 -0.333333 -0.395092       NaN  0.569562  \n",
       "4      -0.080296  0.400124  0.144603  0.116518 -0.170501 -0.277350 -0.043786  \n",
       "5       0.067791 -0.364156  0.244321  0.231080 -0.020546  0.384111  0.040582  \n",
       "...          ...       ...       ...       ...       ...       ...       ...  \n",
       "606     0.682949  0.167062  1.000000  0.114191  0.240842  0.533002  0.389185  \n",
       "607     0.035806 -0.299641  0.114191  1.000000  0.200814  0.190117  0.106605  \n",
       "608     0.053986 -0.075673  0.240842  0.200814  1.000000  0.488929  0.147606  \n",
       "609     0.641624 -0.550000  0.533002  0.190117  0.488929  1.000000 -0.521773  \n",
       "610     0.310611  0.462274  0.389185  0.106605  0.147606 -0.521773  1.000000  \n",
       "\n",
       "[610 rows x 610 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2. 计算相似度\n",
    "# 2.1 计算用户间的皮尔逊相关系数\n",
    "sim_user = user_movie_pivot.T.corr()\n",
    "sim_user"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3. 估计一号用户对一号电影的评分\n",
    "# 3.1 找出和一号用户成正相关的用户集，自身除外\n",
    "sim_user = sim_user[1].drop(1).dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "sim_user = sim_user[sim_user>0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "userId\n",
       "3    0.079819\n",
       "4    0.207983\n",
       "5    0.268749\n",
       "8    0.469668\n",
       "9    0.918559\n",
       "Name: 1, dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sim_user.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3.2 去除没有对1号电影做评价的用户\n",
    "sim_user_index = (sim_user.index) & (user_movie_pivot[1][1:].dropna().index)\n",
    "sim_user = sim_user[sim_user.index.isin(sim_user_index)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.8723418156812968"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 3.2 构建分子和分母\n",
    "sum_up = 0\n",
    "sum_down = 0\n",
    "for user in sim_user.index:\n",
    "    sum_up += sim_user[user] * user_movie_pivot.loc[user][1]\n",
    "    sum_down += sim_user[user]\n",
    "\n",
    "# 3.3 进行估计\n",
    "sum_up/sum_down\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Int64Index([  5,  15,  17,  18,  19,  21,  27,  31,  32,  33,\n",
       "            ...\n",
       "            580, 590, 596, 597, 599, 600, 601, 606, 607, 608],\n",
       "           dtype='int64', name='userId', length=157)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sim_user.index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{1: 3, 2: 4, 3: 5}"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = (1, 2, 3)\n",
    "b = (3, 4, 5)\n",
    "dict(zip(a, b))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<zip object at 0x000002985033E688>\n"
     ]
    }
   ],
   "source": [
    "print(zip(a, b))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
